#!/usr/bin/perl
use strict;
#$file = "C:\\yahooq\\2115500137.txt";
#@files = `dir/b C:\\yahooq`;
my $mobileDicDir = "C:\\DOCUME~1\\Administrator\\workspace\\smsindex\\data\\dic";
#my $dataDir = "C:\\DOCUME~1\\Administrator\\workspace\\smsindex\\data\\mobileFAQ";
#my $dicFile = $mobileDicDir."\\mobileDic";
my $dataDir ="C:\\DOCUME~1\\Administrator\\workspace\\smsindex\\data\\Yahoo_questions";
my $dicFile = $mobileDicDir."\\yahoo_dic";

my @files = `dir/b $dataDir`;
print @files;
 

=pod
foreach my $file (@files){
	my $filepath = $mobileDataDir."\\".$file;
	open(FILE, $filepath)|| die();
	my @array = ();
	@array = <FILE>;
	close(FILE);
	open(FILE,">".$mobileDataDir."\\".$file)||die();
	foreach (@array){
		if($_=~/[\w]/){
			chomp($_);
			$_ =~ s/[^\w\d\s\.\?]/ /ig;
			print FILE lc($_);
			print FILE "\n";
		}
	}
	print $file."\n";
	close(FILE);
}
=cut
#=pod

open(temp, ">$dicFile");
foreach my $file (@files){
	my @myarray =();
	#@myarray = `C:\\ngramtool\\text2ngram.exe -n1 C:\\yahooq\\$file`;
	@myarray = `C:\\ngramtool\\text2ngram.exe -n1 $dataDir\\$file`;
	#print @myarray;
	foreach my $word (@myarray){
		chomp($word);
		$word =~ s/[\s]+[\d]+[\s]*$//ig;
		print temp $word." ";
	}
	print temp "\n";
	
}
close(temp);

my @myarray =();
@myarray = `C:\\ngramtool\\text2ngram.exe -n1 $dicFile`;
open(temp, ">$dicFile");
foreach my $word (@myarray){
		chomp($word);
		$word =~ s/[\s]+[\d]+[\s]*$//ig;
		print temp $word."\n";
}
close(temp);
#=cut